Load Data - 5/24

df <- read.csv("~/sdal/projects/limbo/ArlingtonSchoolProfiles.csv", stringsAsFactors = TRUE)
head(df)

library(rio)
df <- import("~/sdal/projects/limbo/ArlingtonSchoolProfiles.csv")

# LOAD DATA FROM URL/GITHUB
library(RCurl)
url <- getURL("https://jaredlander.com/data/Fake%20Pizza%20Data.csv")
data <- read.csv(textConnection(url))

# LOAD DATA FROM JSON
library(jsonlite)

citibike <- fromJSON("http://citibikenyc.com/stations/json")
  class(citibike) # list
  class(citibike$executionTime) # character
  class(citibike$stationBeanList) # data.frame

stations <- citibike$stationBeanList
colnames(stations)

# FCC LOCATION API EXAMPLE
library(jsonlite)
library(data.table)

FCClocation2FIPS <- function(place_id="VTRC", lon=-77.11577, lat=38.880807) {
  res <- fromJSON(paste0("http://data.fcc.gov/api/block/find?format=json&latitude=", lat, "&longitude=", lon, "&showall=true&format=JSON"))
  data.table(place_id = place_id, 
             state_name = res$State$name, 
             state_fips = res$State$FIPS, 
             county_name = res$County$name, 
             county_fips = res$County$FIPS, 
             block_fips = res$Block$FIPS)
}

FCClocations2FIPS <- function(place_idCol = c("VTRC", "VT-NVC"), lonCol = c(-77.11577, -77.1894815), latCol = c(38.880807, 38.8968325)) {
  res <- as.data.table(t(mapply(FCClocation2FIPS, place_idCol, lonCol, latCol)))
  data.table(place_id = unlist(res$place_id), 
             state_name = unlist(res$state_name), 
             state_fips = unlist(res$state_fips), 
             county_name = unlist(res$county_name), 
             county_fips = unlist(res$county_fips), 
             block_fips = unlist(res$block_fips))
}

# GOOGLE RADAR SEARCH EXAMPLE
radar_search <- fromJSON(paste0("https://maps.googleapis.com/maps/api/place/radarsearch/json?location=", 37.231264, ",", -80.426745, "&radius=1609&type=grocery_or_supermarket&key=AIzaSyC9FKW-kjQlEXjfM3OgMZBJ7xE6zCN1JQI"))

place_id <- "ChIJEauYKUKVTYgR03YCLT7SLeY"

url <- paste0("https://maps.googleapis.com/maps/api/place/details/json?placeid=",
              place_id,
              "&key=AIzaSyC9FKW-kjQlEXjfM3OgMZBJ7xE6zCN1JQI")

detail_search <- fromJSON(url)
head(detail_search)

Functions - 5/25

# functions
function_name <- function(x,y){
  print(x*y)

}

function_name(2,3)
## [1] 6

Groupby - 5/30

head(mtcars)

# we want to know the average mpg for all clylinder types

# manual way 
mean(mtcars[mtcars$cyl==4, ]$mpg)
mean(mtcars[mtcars$cyl==6, ]$mpg)
mean(mtcars[mtcars$cyl==8, ]$mpg)

# groupby way
library(dplyr)
group_by(mtcars, cyl)
# lazy valuation = the only time a computer makes a calculation is when it is told to... so this group_by() function doesn't actually make R calculate anything.
summarize(group_by(mtcars, cyl), mpg_mean = mean(mpg)) # <------- group_by way
summarize(group_by(mtcars, cyl), mpg_mean = mean(mpg), mpg_sd = sd(mpg))

# piping way
mtcars %>% group_by(cyl) %>% summarize(mpg_mean = mean(mpg), mpg_sd = sd(mpg))

# ------------------------------------------------------------------------------------------------
# control flow
# if statements

if (TRUE) {
  print(3)
}
print(10)

if (FALSE) {
  print(3)
}
print(10)

# all of the cars with cylinder==4 returns 'numeric' for class, cylinder==6 returns '6', all others return 'NA'
my_function <- function(row_data){
  cylinder <- row_data['cyl']
  if (cylinder == 4){
    return(class(cylinder)) 
  }
  if (cylinder == 6){
    return(6)
  }
  return(NA)
}

apply(mtcars, MARGIN = 1, my_function) # 1 indicates row-by-row, 2 indicates column-by-column

# if, else...
another_function <- function(row_data){
  cylinder <- row_data['cyl']
  if (cylinder == 4){
    output <- 'buy me'
  }
  else {
    output <- 'sell me'
  }
  return(output)
}
apply(mtcars, MARGIN = 1, another_function)

# tidy data
library(tidyr)
# .... stopped paying attention

library(stringr)
load("~/sdal/projects/limbo/warTimes.rdata")
dash <- str_detect(warTimes, '-') # returns T/F
warTimes[dash] # returns strings containing a dash

the_times <- str_split(warTimes, "(ACAEA) | -", n = 2) # string "ACAEA" used to separate begin and end dates... separate columns by "ACAEA"
# "|" is an or condition (if it doesn't find first, it'll look for second...)
# n = 2 tells function that we only want it to split into 2 parts, so if it finds ACAEA and -, it shouldn't split it into 3 parts

# apply family
# lapply (always gives you a list)
# sapply (tries to give you same data format back)
extract_first <- function(x){
  return(x[1])
}

sapply(the_times, extract_first)

# OR

sapply(the_times, function(x) {x[1]})

start_dates <- sapply(the_times, function(x) {x[1]})

str_extract(start_dates, '[0-9][0-9][0-9][0-9]')

American Community Survey - 5/31

Plotting - 6/1

library(ggplot2)

hist(diamonds$price)

ggplot(diamonds, aes(x=price)) +geom_density()

ggplot(data= diamonds, mapping = aes(x = price, fill = cut)) + geom_histogram()

# discrete vars, use bar graphs
ggplot(data = diamonds, aes(x=cut, fill = color)) + geom_bar()

ggplot(diamonds) +
    geom_point(aes(x=carat, y=price, color = color)) +
    facet_wrap(~cut)

g <- ggplot(diamonds, aes(x=carat, y=price, fill = color)) + geom_hex()
g + theme_minimal()

library(ggthemes)
g <- ggplot(diamonds, aes(x=carat, y = price)) + geom_point()
g + theme_economist()

Data.Table - 6/2

library(data.table)
## Warning: package 'data.table' was built under R version 3.3.2
setwd("~/Documents/SDAL/R/R Practice/")

# download.file("http://download.geonames.org/export/zip/GB_full.csv.zip", "GB_full.csv.zip")
dt <- fread(unzip(zipfile = "~/Documents/SDAL/R/R Practice/GB_full.csv.zip"))
## 
Read 44.2% of 1720673 rows
Read 74.4% of 1720673 rows
Read 1720673 rows and 12 (of 12) columns from 0.214 GB file in 00:00:04
names(dt)
##  [1] "V1"  "V2"  "V3"  "V4"  "V5"  "V6"  "V7"  "V8"  "V9"  "V10" "V11"
## [12] "V12"
# subset columns
dt_subset_col <- dt[ , .(V2, V3, V4)] # instead of c(), do .()
head(dt_subset_col)
##          V2                     V3       V4
## 1:     AB10               Aberdeen Scotland
## 2: AB10 1AB George St/Harbour Ward Scotland
## 3: AB10 1AF George St/Harbour Ward Scotland
## 4: AB10 1AG George St/Harbour Ward Scotland
## 5: AB10 1AH George St/Harbour Ward Scotland
## 6: AB10 1AL George St/Harbour Ward Scotland
class(dt_subset_col)
## [1] "data.table" "data.frame"
df_subset <- as.data.frame(dt_subset_col)
rm("df_subset", "dt_subset_col")

# subset rows
dt_subset_row <- dt[V4 == "England", ]
head(dt_subset_row)
##    V1       V2        V3      V4  V5                   V6        V7
## 1: GB      AL1 St Albans England ENG        Hertfordshire       HRT
## 2: GB     AL10  Hatfield England ENG        Hertfordshire       HRT
## 3: GB AL10 0AA  Hatfield England ENG Hertfordshire County E10000015
## 4: GB AL10 0AB  Hatfield England ENG Hertfordshire County E10000015
## 5: GB AL10 0AD  Hatfield England ENG Hertfordshire County E10000015
## 6: GB AL10 0AE  Hatfield England ENG Hertfordshire County E10000015
##                              V8        V9      V10        V11 V12
## 1:                                              NA         NA   4
## 2:                                              NA         NA   4
## 3: Welwyn Hatfield District (B) E07000241 51.76426 -0.2262538   6
## 4: Welwyn Hatfield District (B) E07000241 51.77312 -0.2233410   6
## 5: Welwyn Hatfield District (B) E07000241 51.77348 -0.2187323   6
## 6: Welwyn Hatfield District (B) E07000241 51.77302 -0.2255192   6
# sort datatable by column
dt[order(V4)] # ascending
##          V1      V2                    V3    V4  V5
##       1: GB     GY1         St Peter Port          
##       2: GB    GY10                  Sark          
##       3: GB     GY3                  Vale          
##       4: GB     GY4             St Martin          
##       5: GB     GY5                Castel          
##      ---                                           
## 1720669: GB     SY9           Cefn Einion Wales WLS
## 1720670: GB     SY9                   Lea Wales WLS
## 1720671: GB     SY9                  Bryn Wales WLS
## 1720672: GB     SY9                Linley Wales WLS
## 1720673: GB SY9 5JP Churchstoke Community Wales WLS
##                                V6        V7            V8        V9
##       1: Guernsey Channel Islands                                  
##       2:          Channel Islands                                  
##       3: Guernsey Channel Islands                                  
##       4: Guernsey Channel Islands                                  
##       5: Guernsey Channel Islands                                  
##      ---                                                           
## 1720669:               Shropshire       SHR                        
## 1720670:               Shropshire       SHR                        
## 1720671:               Shropshire       SHR                        
## 1720672:               Shropshire       SHR                        
## 1720673:            Powys - Powys W06000023 Powys - Powys W06000023
##               V10       V11 V12
##       1:       NA        NA   4
##       2:       NA        NA   4
##       3:       NA        NA   1
##       4:       NA        NA   4
##       5:       NA        NA   4
##      ---                       
## 1720669:       NA        NA   3
## 1720670:       NA        NA   4
## 1720671:       NA        NA   4
## 1720672:       NA        NA   4
## 1720673: 52.50256 -3.054109   6
dt[order(-V4, V3)] # descending V4, ascending V3
##          V1      V2                     V3    V4  V5
##       1: GB LD1 6PG Abbey Cwmhir Community Wales WLS
##       2: GB LD1 6PH Abbey Cwmhir Community Wales WLS
##       3: GB LD1 6PL Abbey Cwmhir Community Wales WLS
##       4: GB LD1 6PN Abbey Cwmhir Community Wales WLS
##       5: GB LD1 6PP Abbey Cwmhir Community Wales WLS
##      ---                                            
## 1720669: GB     IM7              The Cronk          
## 1720670: GB     IM9               The Howe          
## 1720671: GB     IM7               The Lhen          
## 1720672: GB     IM4            Union Mills          
## 1720673: GB     GY3                   Vale          
##                                V6        V7            V8        V9
##       1:            Powys - Powys W06000023 Powys - Powys W06000023
##       2:            Powys - Powys W06000023 Powys - Powys W06000023
##       3:            Powys - Powys W06000023 Powys - Powys W06000023
##       4:            Powys - Powys W06000023 Powys - Powys W06000023
##       5:            Powys - Powys W06000023 Powys - Powys W06000023
##      ---                                                           
## 1720669:              Isle of Man       IOM                        
## 1720670:              Isle of Man       IOM                        
## 1720671:              Isle of Man       IOM                        
## 1720672:              Isle of Man       IOM                        
## 1720673: Guernsey Channel Islands                                  
##               V10       V11 V12
##       1: 52.30944 -3.353440   6
##       2: 52.33015 -3.388163   6
##       3: 52.32855 -3.395935   6
##       4: 52.32428 -3.411208   6
##       5: 52.35929 -3.411780   6
##      ---                       
## 1720669:       NA        NA   3
## 1720670:       NA        NA   3
## 1720671:       NA        NA   3
## 1720672:       NA        NA   4
## 1720673:       NA        NA   1
# adding new columns
# instead of df$new_col <- df$var1 + df$var2 ....
dt[, new_col := V10 + V11] # use := insead of <-
names(dt)
##  [1] "V1"      "V2"      "V3"      "V4"      "V5"      "V6"      "V7"     
##  [8] "V8"      "V9"      "V10"     "V11"     "V12"     "new_col"
# re-code a variable
dt[V8 == "Aberdeen City", V8 := 'Abr City']
head(dt[, V8])
## [1] ""         "Abr City" "Abr City" "Abr City" "Abr City" "Abr City"
# deleting columns
dt[, c('V6', 'V7') := NULL] # back to using c()....

# using built-in functions
head(dt)
##    V1       V2                     V3       V4  V5       V8        V9
## 1: GB     AB10               Aberdeen Scotland SCT                   
## 2: GB AB10 1AB George St/Harbour Ward Scotland SCT Abr City S12000033
## 3: GB AB10 1AF George St/Harbour Ward Scotland SCT Abr City S12000033
## 4: GB AB10 1AG George St/Harbour Ward Scotland SCT Abr City S12000033
## 5: GB AB10 1AH George St/Harbour Ward Scotland SCT Abr City S12000033
## 6: GB AB10 1AL George St/Harbour Ward Scotland SCT Abr City S12000033
##         V10       V11 V12  new_col
## 1:       NA        NA   4       NA
## 2: 57.14961 -2.096916   6 55.05269
## 3: 57.14871 -2.097806   6 55.05090
## 4: 57.14907 -2.096997   6 55.05207
## 5: 57.14808 -2.094664   6 55.05342
## 6: 57.14954 -2.095412   6 55.05413
dt[, .(average = mean(V10, na.rm = TRUE)), by = V4] # store mean(V10) for each V4 in new column called 'average'
##                  V4  average
## 1:         Scotland 56.19868
## 2:          England 52.36342
## 3: Northern Ireland      NaN
## 4:            Wales 52.11765
## 5:                       NaN
# melting
aqdt <- as.data.table(airquality)
aqdt_melt <- data.table::melt(data = aqdt, 
                 id.vars = c("Month", "Day"),
                 measure.vars = c("Ozone", "Solar.R", "Wind", "Temp"),
                 variable.name = 'measurement',
                 value.name = 'reading')
## Warning in melt.data.table(data = aqdt, id.vars = c("Month", "Day"),
## measure.vars = c("Ozone", : 'measure.vars' [Ozone, Solar.R, Wind, Temp]
## are not all of the same type. By order of hierarchy, the molten data value
## column will be of type 'double'. All measure variables not of type 'double'
## will be coerced to. Check DETAILS in ?melt.data.table for more on coercion.
dim(aqdt)
## [1] 153   6
dim(aqdt_melt)
## [1] 612   4
# casting
data.table::dcast(data = aqdt_melt, Month + Day ~ measurement) # this 'undoes' the melt process above
## Using 'reading' as value column. Use 'value.var' to override
##      Month Day Ozone Solar.R Wind Temp
##   1:     5   1    41     190  7.4   67
##   2:     5   2    36     118  8.0   72
##   3:     5   3    12     149 12.6   74
##   4:     5   4    18     313 11.5   62
##   5:     5   5    NA      NA 14.3   56
##  ---                                  
## 149:     9  26    30     193  6.9   70
## 150:     9  27    NA     145 13.2   77
## 151:     9  28    14     191 14.3   75
## 152:     9  29    18     131  8.0   76
## 153:     9  30    20     223 11.5   68

Modeling

devtools::install_github('bi-sdal/sdalr', force = TRUE)
## Downloading GitHub repo bi-sdal/sdalr@master
## from URL https://api.github.com/repos/bi-sdal/sdalr/zipball/master
## Installing sdalr
## Installing DBI
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2901f6539a1/DBI'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing maps
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2904955fdcf/maps'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing tmaptools
## Installing classInt
## Installing e1071
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29022688908/e1071'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2902c394c9e/classInt'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing osmar
## Installing XML
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29041493f0/XML'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29075654e78/osmar'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing rgdal
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools290c7bf052/rgdal'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing rgeos
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2906e92abcc/rgeos'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing rmapshaper
## Installing geojsonio
## Installing httr
## Installing curl
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2901f35cd86/curl'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing mime
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2905d1061cb/mime'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing openssl
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29033061d83/openssl'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing R6
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2903b35d5d2/R6'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29043ae0917/httr'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing readr
## Installing BH
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2902206d290/BH'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing hms
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools290be72db0/hms'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing Rcpp
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2901b76710a/Rcpp'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing tibble
## Installing rlang
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools290150a3508/rlang'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2907e3ec6ac/tibble'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2902226ae4c/readr'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing V8
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29029e62c8f/V8'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29037dff0d3/geojsonio'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing geojsonlint
## Installing jsonvalidate
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools290764b2dde/jsonvalidate'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29043ed9a4a/geojsonlint'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Skipping install of 'readr' from a cran remote, the SHA1 (1.1.1) has not changed since last install.
##   Use `force = TRUE` to force installation
## Skipping install of 'V8' from a cran remote, the SHA1 (1.5) has not changed since last install.
##   Use `force = TRUE` to force installation
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29068215bc7/rmapshaper'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing spdep
## Installing coda
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29064e5a92f/coda'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing deldir
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools290312e0452/deldir'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing expm
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29016afd6f3/expm'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing gmodels
## Installing gdata
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools290191489e2/gdata'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2903851e739/gmodels'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Installing LearnBayes
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools290728e1d36/LearnBayes'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2902b9f2ace/spdep'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## Skipping install of 'XML' from a cran remote, the SHA1 (3.98-1.9) has not changed since last install.
##   Use `force = TRUE` to force installation
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools29036cdf2cb/tmaptools'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file  \
##   --no-environ --no-save --no-restore --quiet CMD INSTALL  \
##   '/private/var/folders/pn/6vp0b7x51x59kxvnb8dl_t1m0000gn/T/RtmpMH43Wg/devtools2905c0e5153/bi-sdal-sdalr-68ab1ce'  \
##   --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library'  \
##   --install-tests
## 
data("anscombe")

summary(anscombe)
##        x1             x2             x3             x4    
##  Min.   : 4.0   Min.   : 4.0   Min.   : 4.0   Min.   : 8  
##  1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 8  
##  Median : 9.0   Median : 9.0   Median : 9.0   Median : 8  
##  Mean   : 9.0   Mean   : 9.0   Mean   : 9.0   Mean   : 9  
##  3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.: 8  
##  Max.   :14.0   Max.   :14.0   Max.   :14.0   Max.   :19  
##        y1               y2              y3              y4        
##  Min.   : 4.260   Min.   :3.100   Min.   : 5.39   Min.   : 5.250  
##  1st Qu.: 6.315   1st Qu.:6.695   1st Qu.: 6.25   1st Qu.: 6.170  
##  Median : 7.580   Median :8.140   Median : 7.11   Median : 7.040  
##  Mean   : 7.501   Mean   :7.501   Mean   : 7.50   Mean   : 7.501  
##  3rd Qu.: 8.570   3rd Qu.:8.950   3rd Qu.: 7.98   3rd Qu.: 8.190  
##  Max.   :10.840   Max.   :9.260   Max.   :12.74   Max.   :12.500
sapply(1:4, function(x){
    cor(anscombe[, x], anscombe[, x+4])})
## [1] 0.8164205 0.8162365 0.8162867 0.8165214
summary(lm(y1 ~ x1, data = anscombe))
## 
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.92127 -0.45577 -0.04136  0.70941  1.83882 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   3.0001     1.1247   2.667  0.02573 * 
## x1            0.5001     0.1179   4.241  0.00217 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared:  0.6665, Adjusted R-squared:  0.6295 
## F-statistic: 17.99 on 1 and 9 DF,  p-value: 0.00217
data('anscombosaurus', package = 'sdalr')

summary(anscombosaurus)
##        x               y         
##  Min.   :22.31   Min.   : 2.949  
##  1st Qu.:44.10   1st Qu.:25.288  
##  Median :53.33   Median :46.026  
##  Mean   :54.26   Mean   :47.832  
##  3rd Qu.:64.74   3rd Qu.:68.526  
##  Max.   :98.21   Max.   :99.487
summary(lm(y~x, data = anscombosaurus))
## 
## Call:
## lm(formula = y ~ x, data = anscombosaurus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.439 -23.213  -1.296  21.368  52.050 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  53.4530     7.6934   6.948 1.29e-10 ***
## x            -0.1036     0.1355  -0.764    0.446    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.98 on 140 degrees of freedom
## Multiple R-squared:  0.004157,   Adjusted R-squared:  -0.002957 
## F-statistic: 0.5844 on 1 and 140 DF,  p-value: 0.4459
plot(anscombosaurus$x, anscombosaurus$y)

library(ggplot2)
diamonds <- diamonds

quantile(diamonds$price, c(.10, .25, .50, .75))
##     10%     25%     50%     75% 
##  646.00  950.00 2401.00 5324.25
economics <- economics

cor(economics$pce, economics$psavert)
## [1] -0.837069
econ_subset <- economics[, c('pce', 'psavert', 'uempmed', 'unemploy')]

library(GGally)
## Warning: package 'GGally' was built under R version 3.3.2
GGally::ggpairs(data = econ_subset)

m <- glm(price ~ carat, data=diamonds)
summary(m)
## 
## Call:
## glm(formula = price ~ carat, data = diamonds)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -18585.3    -804.8     -18.9     537.4   12731.7  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2256.36      13.06  -172.8   <2e-16 ***
## carat        7756.43      14.07   551.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 2398044)
## 
##     Null deviance: 8.5847e+11  on 53939  degrees of freedom
## Residual deviance: 1.2935e+11  on 53938  degrees of freedom
## AIC: 945467
## 
## Number of Fisher Scoring iterations: 2
m <- glm(price ~ carat + table + as.factor(color), data=diamonds)
summary(m)
## 
## Call:
## glm(formula = price ~ carat + table + as.factor(color), data = diamonds)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -18514.1    -756.5     -75.4     552.9   12156.4  
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1611.744    163.349   9.867  < 2e-16 ***
## carat               8134.005     14.179 573.662  < 2e-16 ***
## table                -76.013      2.868 -26.503  < 2e-16 ***
## as.factor(color).L -1581.839     22.177 -71.327  < 2e-16 ***
## as.factor(color).Q  -731.790     20.270 -36.102  < 2e-16 ***
## as.factor(color).C  -114.228     19.024  -6.004 1.93e-09 ***
## as.factor(color)^4    72.067     17.471   4.125 3.71e-05 ***
## as.factor(color)^5  -140.016     16.517  -8.477  < 2e-16 ***
## as.factor(color)^6  -174.730     14.983 -11.662  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 2137611)
## 
##     Null deviance: 8.5847e+11  on 53939  degrees of freedom
## Residual deviance: 1.1528e+11  on 53931  degrees of freedom
## AIC: 939272
## 
## Number of Fisher Scoring iterations: 2
library(broom)
## Warning: package 'broom' was built under R version 3.3.2
broom::tidy(m)$estimate
## [1]  1611.74365  8134.00465   -76.01264 -1581.83911  -731.78962  -114.22765
## [7]    72.06672  -140.01590  -174.73000
# broom::augment(m)
broom::glance(m)
##   null.deviance df.null    logLik      AIC      BIC     deviance
## 1  858473135517   53939 -469626.2 939272.4 939361.3 115283523794
##   df.residual
## 1       53931
plot(m)

# do not want to see a pattern in the residual plot
# QQ plot should show straight line (depending on distribution of QQ plot)

mpg_bin <- function(mpg) {
    if (mpg > 22) {
        return('good')
    } else {
        return('poor')
    }
}

mpg_bin_int <- function(mpg) {
    if (mpg > 22) {
        return(1)
    } else {
        return(0)
    }
}

mtcars$mpg_bin <- sapply(X = mtcars$mpg, FUN = mpg_bin)
mtcars$mpg_bin_int <- sapply(X = mtcars$mpg, FUN = mpg_bin_int)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
##                   mpg_bin mpg_bin_int
## Mazda RX4            poor           0
## Mazda RX4 Wag        poor           0
## Datsun 710           good           1
## Hornet 4 Drive       poor           0
## Hornet Sportabout    poor           0
## Valiant              poor           0
m <- glm(formula = mpg_bin_int ~ hp + wt, data = mtcars, family = binomial(link = 'logit'))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m)
## 
## Call:
## glm(formula = mpg_bin_int ~ hp + wt, family = binomial(link = "logit"), 
##     data = mtcars)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.72029  -0.00913  -0.00001   0.00314   1.40334  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  30.4063    18.0320   1.686   0.0917 .
## hp           -0.2201     0.1447  -1.521   0.1283  
## wt           -3.1801     1.9659  -1.618   0.1057  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 38.0243  on 31  degrees of freedom
## Residual deviance:  6.5068  on 29  degrees of freedom
## AIC: 12.507
## 
## Number of Fisher Scoring iterations: 10
results <- broom::tidy(m)
results$or <- exp(results$estimate)
results
##          term   estimate  std.error statistic    p.value           or
## 1 (Intercept) 30.4062991 18.0320177  1.686239 0.09174969 1.604309e+13
## 2          hp -0.2200611  0.1446913 -1.520901 0.12828471 8.024697e-01
## 3          wt -3.1801489  1.9658545 -1.617693 0.10572880 4.157947e-02
library(survival)
head(heart) # about people on waiting list for Harvard heart transplant
##   start stop event        age      year surgery transplant id
## 1     0   50     1 -17.155373 0.1232033       0          0  1
## 2     0    6     1   3.835729 0.2546201       0          0  2
## 3     0    1     0   6.297057 0.2655715       0          0  3
## 4     1   16     1   6.297057 0.2655715       0          1  3
## 5     0   36     0  -7.737166 0.4900753       0          0  4
## 6    36   39     1  -7.737166 0.4900753       0          1  4
cox_model <- coxph(Surv(heart$start, heart$stop, heart$event) ~ age + year + as.factor(surgery) + as.factor(transplant), data = heart)
summary(cox_model)
## Call:
## coxph(formula = Surv(heart$start, heart$stop, heart$event) ~ 
##     age + year + as.factor(surgery) + as.factor(transplant), 
##     data = heart)
## 
##   n= 172, number of events= 75 
## 
##                            coef exp(coef) se(coef)      z Pr(>|z|)  
## age                     0.02717   1.02754  0.01371  1.981   0.0476 *
## year                   -0.14635   0.86386  0.07047 -2.077   0.0378 *
## as.factor(surgery)1    -0.63721   0.52877  0.36723 -1.735   0.0827 .
## as.factor(transplant)1 -0.01025   0.98980  0.31375 -0.033   0.9739  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                        exp(coef) exp(-coef) lower .95 upper .95
## age                       1.0275     0.9732    1.0003    1.0555
## year                      0.8639     1.1576    0.7524    0.9918
## as.factor(surgery)1       0.5288     1.8912    0.2574    1.0860
## as.factor(transplant)1    0.9898     1.0103    0.5352    1.8307
## 
## Concordance= 0.636  (se = 0.037 )
## Rsquare= 0.084   (max possible= 0.969 )
## Likelihood ratio test= 15.11  on 4 df,   p=0.004476
## Wald test            = 14.49  on 4 df,   p=0.005877
## Score (logrank) test = 15.03  on 4 df,   p=0.004631
plot(survfit(cox_model)) # kaplan meyer curve (everyone is alive on day zero, as the number of days increase, the solid line shows you how many people are left in your population)

m1 <- glm(price ~ carat, data = diamonds)
m2 <- glm(price ~ carat + as.factor(cut), data = diamonds)
m3 <- glm(price ~ carat + as.factor(cut) + depth, data = diamonds)
m4 <- glm(price ~ carat + as.factor(cut) + depth + table, data = diamonds)

# ways of comparing models
anova(m1, m2, m3, m4)
## Analysis of Deviance Table
## 
## Model 1: price ~ carat
## Model 2: price ~ carat + as.factor(cut)
## Model 3: price ~ carat + as.factor(cut) + depth
## Model 4: price ~ carat + as.factor(cut) + depth + table
##   Resid. Df Resid. Dev Df   Deviance
## 1     53938 1.2935e+11              
## 2     53934 1.2321e+11  4 6133201436
## 3     53933 1.2297e+11  1  246635400
## 4     53932 1.2270e+11  1  264127832
# AIC and BIC put penalty on every new variable you add to a model
# looking for the smallest number to identify best model
AIC(m1, m2, m3, m4)
##    df      AIC
## m1  3 945466.5
## m2  7 942854.2
## m3  8 942748.1
## m4  9 942634.2
BIC(m1, m2, m3, m4)
##    df      BIC
## m1  3 945493.2
## m2  7 942916.5
## m3  8 942819.3
## m4  9 942714.2
# cross-validation (cv) prediction error for generalized linear models (glm)
library(boot) 
## 
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
## 
##     aml
cv1 <- cv.glm(data = diamonds, glmfit = m1, K = 5) # K = number of partitions you want your data in (5 and 10 are popular)
cv2 <- cv.glm(data = diamonds, glmfit = m2, K = 5)
cv3 <- cv.glm(data = diamonds, glmfit = m3, K = 5)
cv4 <- cv.glm(data = diamonds, glmfit = m4, K = 5)

# print errors from all cross validations; pick the one with the smallest errors
cv1$delta
## [1] 2398543 2398478
cv2$delta
## [1] 2285631 2285478
cv3$delta
## [1] 2280494 2280403
cv4$delta
## [1] 2275951 2275821

Bayesian Statistics

# commands to run a simple 1-d mcmc.

# number of mcmc scans
Niter <- 1000  

# logdens = log density
logdens1 <- function(x){
 # returns the log of the standard normal density 
 # the density is not normalized here
 return(-.5*x^2)
}

# build a vector to store the mcmc output
x <- rep(0,Niter)

# initialize x[1] with a starting value
x[1] <- 20.0

# compute the log of the posterior density for x[1]
logpost <- logdens1(x[1])

# width of metropolis proposal 
a <- 3 # use a uniform U[-a,a] metropolis proposal (give or take 3)

# carry out the Metropolis sampling
for(i in 2:Niter){
  can = x[i-1] + runif(1,-a,a) # generate candidate centered at old value, give or take a
  logpostcan <- logdens1(can) # generate the posterior density of that
  u <- runif(1) # generate uniform to tell us whether or not we accept this value
  if(log(u) < (logpostcan - logpost)){ # if the uniform is < ratio of new/old posterior, then accept (leave current value at the old value)
     # accept the proposal
    x[i] <- can
    logpost <- logpostcan # update the posterior density
  } else{
  x[i] <- x[i-1] # set the new value to the old value; leave logpost as is.
  }
}

# make a plot of the output
# use par to tell R to plot a single image, and set the margin area
par(mfrow=c(1,2),oma=c(0,0,0,0))#,matlabr=c(4,4,1,1))
plot(1:Niter,x,type='l')
hist(x[-(1:100)],main=paste('a =',a,sep=''),xlab='x',probability=T)
lines(seq(-3,3,length=200),dnorm(seq(-3,3,length=200)))

browser()
## Called from: eval(expr, envir, enclos)
# now lets look at what happens if we change a and the starting value.
# it'll be easier to make the mcmc iterations a function

mcmc1d <- function(x0,Niter,a,LOGDENS){
 # run a metropolis chain using U(x-a,x+a) proposals
 # x0 is the starting value
 # Niter is the number of scans in this metropolis chain
 # LOGDENS is the log of the density from which we want draws.
 
 # the function returns a vector holding the output from the Metropolis sample
 x <- rep(0,Niter)

 # initialize x[1] with a starting value
 x[1] <- x0

 # compute the log of the posterior density for x[1]
 logpost <- LOGDENS(x[1])

 # carry out the Metropolis sampling
 for(i in 2:Niter){
   can = x[i-1] + runif(1,-a,a)
   logpostcan <- LOGDENS(can)
   u <- runif(1)
   if(log(u) < (logpostcan - logpost)){
      # accept the proposal
     x[i] <- can
     logpost <- logpostcan
   } else{
   x[i] <- x[i-1] # set the new value to the old value; leave logpost as is.
   }
 }
 return(x)
}

# first, let's look at the effect of changing the starting value
# we'll look at a=20,10,5,0
# this should motivate ideas about "burn-in":  Should we discard
# some of the initial piece of the chain so that our estimates
# are more accurate?

x1 <- mcmc1d(20,1000,3,logdens1)
x2 <- mcmc1d(10,1000,3,logdens1)
x3 <- mcmc1d(5,1000,3,logdens1)
x4 <- mcmc1d(0,1000,3,logdens1)

# look at some plots

par(mfrow=c(2,2),oma=c(0,0,0,0),mar=c(4,4,1,1))
plot(1:Niter,x1,ylim=c(-3,21),type='l')
plot(1:Niter,x2,ylim=c(-3,21),type='l')
plot(1:Niter,x3,ylim=c(-3,21),type='l')
plot(1:Niter,x4,ylim=c(-3,21),type='l')

browser()
# now, let's look at the effect of changing the proposal width a
# we'll look at a=20,10,3,0.1
x1 <- mcmc1d(20,1000,20,logdens1)
x2 <- mcmc1d(20,1000,3,logdens1)
x3 <- mcmc1d(20,1000,1,logdens1)
x4 <- mcmc1d(20,1000,.1,logdens1)

# look at some plots

 # different widths - no labels
par(mfrow=c(2,2),oma=c(0,0,0,0),mar=c(4,4,1,1))
plot(1:Niter,x1,ylim=c(-3,21),type='l',xlab='iteration')
plot(1:Niter,x2,ylim=c(-3,21),type='l',xlab='iteration')
plot(1:Niter,x3,ylim=c(-3,21),type='l',xlab='iteration')
plot(1:Niter,x4,ylim=c(-3,21),type='l',xlab='iteration')

browser()

 # show the labels
par(mfrow=c(2,2),oma=c(0,0,0,0),mar=c(4,4,1,1))
plot(1:Niter,x1,ylim=c(-3,21),type='l',xlab='iteration')
text(700,17,'a=20')
plot(1:Niter,x2,ylim=c(-3,21),type='l',xlab='iteration')
text(700,17,'a=3')
plot(1:Niter,x3,ylim=c(-3,21),type='l',xlab='iteration')
text(700,17,'a=1')
plot(1:Niter,x4,ylim=c(-3,21),type='l',xlab='iteration')
text(700,17,'a=.1')

# LESSON: a high (bad?) a is good to get you near the distribution quickly, but once you're close to it it doesn't perform as well as a lower a.

browser()

# now, let's look at the effect of changing the density
# we'll look at a mixture of normals
# a question we can look at is what width is best for
# sampling from this density via the metropolis algorithm
# 

logdens2 <- function(x){
 # returns a mixture of normals
 # .6*N(mu=-1,sd=.4) + .4*N(mu=.8,sd=.4)
 # the density is logged
 dens <- .6*dnorm(x,-1,.4)+.4*dnorm(x,.8,.4)
 return(log(dens))
}

logdens3 <- function(x){
 # returns a mixture of normals
 # .6*N(mu=-1,sd=.4) + .4*U(0,1)
 # the density is logged
 dens <- .6*dnorm(x,-1,.4)+.4*(x>0)*(x<1)
 return(log(dens))
}

logdens4 <- function(x){
 # returns a mixture of normals
 # .6*U[-1.2,-.2] + .4*dnorm(x,1,.3)
 # the density is logged
 dens <- .6*(x > -1.2)*(x < -.2) +.4*dnorm(x,1,.3)
 return(log(dens))
}

browser()
# a plot of the density
x <- seq(-3,3,length=200)
par(mfrow=c(1,1),oma=c(0,0,0,0),mar=c(4,4,1,1))
plot(x,exp(logdens2(x)),type='l')

browser()

x1 <- mcmc1d(2,1000,20,logdens2)
x2 <- mcmc1d(2,1000,3,logdens2)
x3 <- mcmc1d(2,1000,1,logdens2)
x4 <- mcmc1d(2,1000,.2,logdens2)

# look at some plots

par(mfrow=c(2,2),oma=c(0,0,0,0),mar=c(4,4,1,1))
plot(1:Niter,x1,ylim=c(-3,5),type='l',xlab='iteration')
text(700,4,'a=20')
plot(1:Niter,x2,ylim=c(-3,5),type='l',xlab='iteration')
text(700,4,'a=3')
plot(1:Niter,x3,ylim=c(-3,5),type='l',xlab='iteration')
text(700,4,'a=1')
plot(1:Niter,x4,ylim=c(-3,5),type='l',xlab='iteration')
text(700,4,'a=.2')

browser()

par(mfrow=c(2,2),oma=c(0,0,0,0),mar=c(4,4,1,1))
hist(x1[-(1:100)],probability=TRUE,ylim=c(0,.75),xlim=c(-3,3),xlab='x',
  main='a=20')
lines(x,exp(logdens2(x)))
hist(x2[-(1:100)],probability=TRUE,ylim=c(0,.75),xlim=c(-3,3),xlab='x',
  main='a=3')
lines(x,exp(logdens2(x)))
hist(x3[-(1:100)],probability=TRUE,ylim=c(0,.75),xlim=c(-3,3),xlab='x',
  main='a=1')
lines(x,exp(logdens2(x)))
hist(x4[-(1:100)],probability=TRUE,ylim=c(0,.75),xlim=c(-3,3),xlab='x',
  main='a=.2')
lines(x,exp(logdens2(x)))

browser()

#
# for the students, they can look at the mixture of normals problem.  What is
# their estimate for the P(x > 1.5)?  What is their choice for proposal width a?
# Did you discard any of the initial part of the chain?
#